Nonlinear growth models represent an instance of nonlinear regression models, a class of models taking the general form \[ y = \mu(x, \theta) + \epsilon, \] where \(\mu(x, \theta)\) is the mean function which depends on a possibly vector-valued parameter \(\theta\), and a possibly vector-valued predictor \(x\). The stochastic component \(\epsilon\) represents the error with mean zero and constant variance. Usually, a Gaussian distribution is also assumed for the error term.
By defining the mean function \(\mu(x, \theta)\) we may obtain several different models, all characterized by the fact that parameters \(\theta\) enter in a nonlinear way into the equation. Parameters are usually estimated by nonlinear least squares which aims at minimizing the residual sum of squares.
\[ \mu(x) = \theta_1 \exp\{\theta_2 x\} \] where \(\theta_1\) is the value at the origin (i.e. \(\mu(x=0)\)), and \(\theta_2\) represents the (constant) relative ratio of change (i.e. \(\frac{d\mu(x)}{dx }\frac{1}{\mu(x)} = \theta_2\)). Thus, the model describes an increasing (exponential growth if \(\theta_2 > 0\)) or decreasing (exponential decay if \(\theta_2 < 0\)) trend with constant relative rate.
\[ \mu(x) = \frac{\theta_1}{1+\exp\{(\theta_2 - x)/\theta_3\}} \] where \(\theta_1\) is the upper horizontal asymptote, \(\theta_2\) represents the x-value at the inflection point of the symmetric growth curve, and \(\theta_3\) represents a scale parameter (and \(1/\theta_3\) is the growth-rate parameter that controls how quickly the curve approaches the upper asymptote).
\[ \mu(x) = \theta_1 \exp\{-\theta_2 \theta_3^x\} \] where \(\theta_1\) is the horizontal asymptote, \(\theta_2\) represents the value of the function at \(x = 0\) (displacement along the x-axis), and \(\theta_3\) represents a scale parameter.
The difference between the logistic and Gompertz functions is that the latter is not symmetric around the inflection point.
\[ \mu(x) = \theta_1 (1 - \exp\{-\theta_2 x\})^{\theta_3} \] where \(\theta_1\) is the horizontal asymptote, \(\theta_2\) represents the rate of growth, and \(\theta_3\) in part determines the point of inflection on the y-axis.
Dipartimento della Protezione Civile: COVID-19 Italia - Monitoraggio della situazione http://arcg.is/C1unv
Source: https://github.com/pcm-dpc/COVID-19
url = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
COVID19 <- read.csv(file = url, stringsAsFactors = FALSE)
COVID19$data <- as.Date(COVID19$data)
# DT::datatable(COVID19)Warnings
- 29/03/2020: dati Regione Emilia Romagna parziali (dato tampone non aggiornato).
- 26/03/2020: dati Regione Piemonte parziali (-50 deceduti - comunicazione tardiva)
- 18/03/2020: dati Regione Campania non pervenuti.
- 18/03/2020: dati Provincia di Parma non pervenuti.
- 17/03/2020: dati Provincia di Rimini non aggiornati
- 16/03/2020: dati P.A. Trento e Puglia non pervenuti.
- 11/03/2020: dati Regione Abruzzo non pervenuti.
- 10/03/2020: dati Regione Lombardia parziali.
- 07/03/2020: dati Brescia +300 esiti positivi
# create data for analysis
data = data.frame(date = COVID19$data,
y = COVID19$totale_casi)
data$x = as.numeric(data$date) - min(as.numeric(data$date)) + 1
DT::datatable(data, options = list("pageLength" = 5))mod1_start = lm(log(y) ~ x, data = data)
b = unname(coef(mod1_start))
start = list(th1 = exp(b[1]), th2 = b[2])
exponential <- function(x, th1, th2) th1 * exp(th2 * x)
mod1 = nls(y ~ exponential(x, th1, th2), data = data, start = start)
summary(mod1)
##
## Formula: y ~ exponential(x, th1, th2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 6758.718857 913.938740 7.395 0.00000000398 ***
## th2 0.071650 0.003541 20.233 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10280 on 42 degrees of freedom
##
## Number of iterations to convergence: 10
## Achieved convergence tolerance: 0.000009694mod2 = nls(y ~ SSlogis(x, Asym, xmid, scal), data = data)
summary(mod2)
##
## Formula: y ~ SSlogis(x, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 147850.4983 1618.4196 91.36 <2e-16 ***
## xmid 30.8837 0.1823 169.42 <2e-16 ***
## scal 6.0388 0.1008 59.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1408 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.0000002824mod3 = nls(y ~ SSgompertz(x, Asym, b2, b3), data = data)
# start = list(Asym = coef(mod2)[1])
# tmp = list(y = log(log(start$Asym) - log(data$y)), x = data$x)
# b = unname(coef(lm(y ~ x, data = tmp)))
# start = c(start, c(b2 = exp(b[1]), b3 = exp(b[2])))
# mod3 = nls(y ~ SSgompertz(x, Asym, b2, b3), data = data, start = start,
# control = nls.control(maxiter = 1000))
summary(mod3)
##
## Formula: y ~ SSgompertz(x, Asym, b2, b3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 196687.320590 3441.169949 57.16 <2e-16 ***
## b2 10.025135 0.271430 36.93 <2e-16 ***
## b3 0.927422 0.001382 670.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1020 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.0000007416richards <- function(x, th1, th2, th3) th1*(1 - exp(-th2*x))^th3
Loss <- function(th, y, x) sum((y - richards(x, th[1], th[2], th[3]))^2)
start <- optim(par = c(coef(mod2)[1], 0.001, 1), fn = Loss,
y = data$y, x = data$x)$par
names(start) <- c("th1", "th2", "th3")
mod4 = nls(y ~ richards(x, th1, th2, th3), data = data, start = start,
# trace = TRUE, algorithm = "plinear",
control = nls.control(maxiter = 1000, tol = 0.1))
# algorithm is not converging...
summary(mod4)
##
## Formula: y ~ richards(x, th1, th2, th3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 214735.824697 7005.475001 30.65 <2e-16 ***
## th2 0.062719 0.002531 24.78 <2e-16 ***
## th3 6.865356 0.359753 19.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1330 on 41 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 0.08979
# library(nlmrt)
# mod4 = nlxb(y ~ th1*(1 - exp(-th2*x))^th3,
# data = data, start = start, trace = TRUE)models = list("Exponential model" = mod1,
"Logistic model" = mod2,
"Gompertz model" = mod3,
"Richards model" = mod4)
tab = data.frame(loglik = sapply(models, logLik),
df = sapply(models, function(m) attr(logLik(m), "df")),
Rsquare = sapply(models, function(m)
cor(data$y, fitted(m))^2),
AIC = sapply(models, AIC),
AICc = sapply(models, AICc),
BIC = sapply(models, BIC))
sel <- apply(tab[,4:6], 2, which.min)
tab$"" <- sapply(tabulate(sel, nbins = length(models))+1, symnum,
cutpoints = 0:4, symbols = c("", "*", "**", "***"))
knitr::kable(tab)| loglik | df | Rsquare | AIC | AICc | BIC | ||
|---|---|---|---|---|---|---|---|
| Exponential model | -467.8758 | 3 | 0.9591878 | 941.7516 | 942.3516 | 947.1042 | |
| Logistic model | -379.8783 | 4 | 0.9992512 | 767.7565 | 768.7822 | 774.8933 | |
| Gompertz model | -365.6777 | 4 | 0.9996052 | 739.3554 | 740.3811 | 746.4922 | *** |
| Richards model | -377.3526 | 4 | 0.9993631 | 762.7053 | 763.7309 | 769.8420 |
ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(aes(y = fitted(mod1), color = "Exponential")) +
geom_line(aes(y = fitted(mod2), color = "Logistic")) +
geom_line(aes(y = fitted(mod3), color = "Gompertz")) +
geom_line(aes(y = fitted(mod4), color = "Richards")) +
labs(x = "", y = "Infected", color = "Model") +
scale_color_manual(values = cols) +
scale_y_continuous(breaks = seq(0, coef(mod2)[1], by = 10000),
minor_breaks = seq(0, coef(mod2)[1], by = 5000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))last_plot() +
scale_y_continuous(trans = "log10", limits = c(100,NA)) +
labs(y = "Infected (log10 scale)")df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1),
fit1 = predict(mod1, newdata = df),
fit2 = predict(mod2, newdata = df),
fit3 = predict(mod3, newdata = df),
fit4 = predict(mod4, newdata = df))
ylim = c(0, max(df[,c("fit2", "fit3")]))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = df, aes(x = date, y = fit1, color = "Exponential")) +
geom_line(data = df, aes(x = date, y = fit2, color = "Logistic")) +
geom_line(data = df, aes(x = date, y = fit3, color = "Gompertz")) +
geom_line(data = df, aes(x = date, y = fit4, color = "Richards")) +
coord_cartesian(ylim = ylim) +
labs(x = "", y = "Infected", color = "Model") +
scale_y_continuous(breaks = seq(0, max(ylim), by = 10000),
minor_breaks = seq(0, max(ylim), by = 5000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))# compute prediction using Moving Block Bootstrap (MBB) for nls
df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1))
pred1 = cbind(df, "fit" = predict(mod1, newdata = df))
pred1[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod1, df[df$x > max(data$x),])[,2:3]
pred2 = cbind(df, "fit" = predict(mod2, newdata = df))
pred2[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod2, df[df$x > max(data$x),])[,2:3]
pred3 = cbind(df, "fit" = predict(mod3, newdata = df))
pred3[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod3, df[df$x > max(data$x),])[,2:3]
pred4 = cbind(df, "fit" = predict(mod4, newdata = df))
pred4[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod4, df[df$x > max(data$x),])[,2:3]
# predictions for next day
pred = rbind(subset(pred1, x == max(data$x)+1, select = 2:5),
subset(pred2, x == max(data$x)+1, select = 2:5),
subset(pred3, x == max(data$x)+1, select = 2:5),
subset(pred4, x == max(data$x)+1, select = 2:5))
print(pred, digits = 3)
## date fit lwr upr
## 45 2020-04-08 169878 140024 200333
## 451 2020-04-08 134831 131209 137708
## 452 2020-04-08 140313 137623 143208
## 453 2020-04-08 140965 137519 145191
ylim = c(0, max(pred2$upr, pred3$upr, na.rm=TRUE))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = pred1, aes(x = date, y = fit, color = "Exponential")) +
geom_line(data = pred2, aes(x = date, y = fit, color = "Logistic")) +
geom_line(data = pred3, aes(x = date, y = fit, color = "Gompertz")) +
geom_line(data = pred4, aes(x = date, y = fit, color = "Richards")) +
geom_ribbon(data = pred1, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[1], alpha=0.3) +
geom_ribbon(data = pred2, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[2], alpha=0.3) +
geom_ribbon(data = pred3, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[3], alpha=0.3) +
geom_ribbon(data = pred4, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[4], alpha=0.3) +
coord_cartesian(ylim = c(0, max(ylim))) +
labs(x = "", y = "Infected", color = "Model") +
scale_y_continuous(minor_breaks = seq(0, max(ylim), by = 10000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))# create data for analysis
data = data.frame(date = COVID19$data,
y = COVID19$deceduti)
data$x = as.numeric(data$date) - min(as.numeric(data$date)) + 1
DT::datatable(data, options = list("pageLength" = 5))mod1_start = lm(log(y) ~ x, data = data)
b = unname(coef(mod1_start))
start = list(th1 = exp(b[1]), th2 = b[2])
exponential <- function(x, th1, th2) th1 * exp(th2 * x)
mod1 = nls(y ~ exponential(x, th1, th2), data = data, start = start)
summary(mod1)
##
## Formula: y ~ exponential(x, th1, th2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 472.714054 69.681862 6.784 0.0000000296 ***
## th2 0.084798 0.003775 22.462 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1092 on 42 degrees of freedom
##
## Number of iterations to convergence: 11
## Achieved convergence tolerance: 0.000002829mod2 = nls(y ~ SSlogis(x, Asym, xmid, scal), data = data)
summary(mod2)
##
## Formula: y ~ SSlogis(x, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 19579.34504 228.11191 85.83 <2e-16 ***
## xmid 33.74339 0.17071 197.67 <2e-16 ***
## scal 5.62057 0.08452 66.50 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 146.1 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.0000005029mod3 = nls(y ~ SSgompertz(x, Asym, b2, b3), data = data)
# manually set starting values
# start = list(Asym = coef(mod2)[1])
# tmp = list(y = log(log(start$Asym) - log(data$y)), x = data$x)
# b = unname(coef(lm(y ~ x, data = tmp)))
# start = c(start, c(b2 = exp(b[1]), b3 = exp(b[2])))
# mod3 = nls(y ~ SSgompertz(x, Asym, b2, b3), data = data, start = start,
# control = nls.control(maxiter = 10000))
summary(mod3)
##
## Formula: y ~ SSgompertz(x, Asym, b2, b3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 28582.190126 571.205701 50.04 <2e-16 ***
## b2 13.401511 0.376627 35.58 <2e-16 ***
## b3 0.928034 0.001307 710.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 103.3 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.0000002748richards <- function(x, th1, th2, th3) th1*(1 - exp(-th2*x))^th3
Loss <- function(th, y, x) sum((y - richards(x, th[1], th[2], th[3]))^2)
start <- optim(par = c(coef(mod2)[1], 0.001, 1), fn = Loss,
y = data$y, x = data$x)$par
names(start) <- c("th1", "th2", "th3")
mod4 = nls(y ~ richards(x, th1, th2, th3), data = data, start = start,
# trace = TRUE, algorithm = "port",
control = nls.control(maxiter = 1000))
summary(mod4)
##
## Formula: y ~ richards(x, th1, th2, th3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 30999.994563 1024.197584 30.27 <2e-16 ***
## th2 0.064717 0.002202 29.38 <2e-16 ***
## th3 9.709261 0.477609 20.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 127.6 on 41 degrees of freedom
##
## Number of iterations to convergence: 2
## Achieved convergence tolerance: 0.000001331models = list("Exponential model" = mod1,
"Logistic model" = mod2,
"Gompertz model" = mod3,
"Richards model" = mod4)
tab = data.frame(loglik = sapply(models, logLik),
df = sapply(models, function(m) attr(logLik(m), "df")),
Rsquare = sapply(models, function(m)
cor(data$y, fitted(m))^2),
AIC = sapply(models, AIC),
AICc = sapply(models, AICc),
BIC = sapply(models, BIC))
sel <- apply(tab[,4:6], 2, which.min)
tab$"" <- sapply(tabulate(sel, nbins = length(models))+1, symnum,
cutpoints = 0:4, symbols = c("", "*", "**", "***"))
knitr::kable(tab)| loglik | df | Rsquare | AIC | AICc | BIC | ||
|---|---|---|---|---|---|---|---|
| Exponential model | -369.2251 | 3 | 0.9704563 | 744.4502 | 745.0502 | 749.8027 | |
| Logistic model | -280.1774 | 4 | 0.9995129 | 568.3548 | 569.3805 | 575.4916 | |
| Gompertz model | -264.9186 | 4 | 0.9997137 | 537.8372 | 538.8628 | 544.9739 | *** |
| Richards model | -274.2330 | 4 | 0.9995791 | 556.4661 | 557.4917 | 563.6028 |
ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(aes(y = fitted(mod1), color = "Exponential")) +
geom_line(aes(y = fitted(mod2), color = "Logistic")) +
geom_line(aes(y = fitted(mod3), color = "Gompertz")) +
geom_line(aes(y = fitted(mod4), color = "Richards")) +
labs(x = "", y = "Deceased", color = "Model") +
scale_color_manual(values = cols) +
scale_y_continuous(breaks = seq(0, coef(mod2)[1], by = 1000),
minor_breaks = seq(0, coef(mod2)[1], by = 500)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))last_plot() +
scale_y_continuous(trans = "log10", limits = c(10,NA)) +
labs(y = "Deceased (log10 scale)")df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1),
fit1 = predict(mod1, newdata = df),
fit2 = predict(mod2, newdata = df),
fit3 = predict(mod3, newdata = df),
fit4 = predict(mod4, newdata = df))
ylim = c(0, max(df[,-(1:3)]))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = df, aes(x = date, y = fit1, color = "Exponential")) +
geom_line(data = df, aes(x = date, y = fit2, color = "Logistic")) +
geom_line(data = df, aes(x = date, y = fit3, color = "Gompertz")) +
geom_line(data = df, aes(x = date, y = fit4, color = "Richards")) +
coord_cartesian(ylim = ylim) +
labs(x = "", y = "Deceased", color = "Model") +
scale_y_continuous(breaks = seq(0, max(ylim), by = 1000),
minor_breaks = seq(0, max(ylim), by = 1000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))# compute prediction using Moving Block Bootstrap (MBB) for nls
df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1))
pred1 = cbind(df, "fit" = predict(mod1, newdata = df))
pred1[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod1, df[df$x > max(data$x),])[,2:3]
pred2 = cbind(df, "fit" = predict(mod2, newdata = df))
pred2[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod2, df[df$x > max(data$x),])[,2:3]
pred3 = cbind(df, "fit" = predict(mod3, newdata = df))
pred3[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod3, df[df$x > max(data$x),])[,2:3]
pred4 = cbind(df, "fit" = predict(mod4, newdata = df))
pred4[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod4, df[df$x > max(data$x),])[,2:3]
# predictions for next day
pred = rbind(subset(pred1, x == max(data$x)+1, select = 2:5),
subset(pred2, x == max(data$x)+1, select = 2:5),
subset(pred3, x == max(data$x)+1, select = 2:5),
subset(pred4, x == max(data$x)+1, select = 2:5))
print(pred, digits = 3)
## date fit lwr upr
## 45 2020-04-08 21470 18322 24945
## 451 2020-04-08 17251 16819 17576
## 452 2020-04-08 17952 17705 18257
## 453 2020-04-08 18018 17705 18424
ylim = c(0, max(pred2$upr, pred3$upr, na.rm=TRUE))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = pred1, aes(x = date, y = fit, color = "Exponential")) +
geom_line(data = pred2, aes(x = date, y = fit, color = "Logistic")) +
geom_line(data = pred3, aes(x = date, y = fit, color = "Gompertz")) +
geom_line(data = pred4, aes(x = date, y = fit, color = "Richards")) +
geom_ribbon(data = pred1, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[1], alpha=0.3) +
geom_ribbon(data = pred2, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[2], alpha=0.3) +
geom_ribbon(data = pred3, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[3], alpha=0.3) +
geom_ribbon(data = pred4, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[4], alpha=0.3) +
coord_cartesian(ylim = c(0, max(ylim))) +
labs(x = "", y = "Deceased", color = "Model") +
scale_y_continuous(minor_breaks = seq(0, max(ylim), by = 1000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))# create data for analysis
data = data.frame(date = COVID19$data,
y = COVID19$dimessi_guariti)
data$x = as.numeric(data$date) - min(as.numeric(data$date)) + 1
DT::datatable(data, options = list("pageLength" = 5))mod1_start = lm(log(y) ~ x, data = data)
b = unname(coef(mod1_start))
start = list(th1 = exp(b[1]), th2 = b[2])
exponential <- function(x, th1, th2) th1 * exp(th2 * x)
mod1 = nls(y ~ exponential(x, th1, th2), data = data, start = start)
summary(mod1)
##
## Formula: y ~ exponential(x, th1, th2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 516.642923 60.844938 8.491 0.000000000117 ***
## th2 0.089825 0.002995 29.990 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1091 on 42 degrees of freedom
##
## Number of iterations to convergence: 11
## Achieved convergence tolerance: 0.00000268mod2 = nls(y ~ SSlogis(x, Asym, xmid, scal), data = data)
summary(mod2)
##
## Formula: y ~ SSlogis(x, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 32016.1468 839.6911 38.13 <2e-16 ***
## xmid 37.0064 0.3864 95.77 <2e-16 ***
## scal 6.4455 0.1424 45.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 277.4 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.000006546mod3 = nls(y ~ SSgompertz(x, Asym, b2, b3), data = data)
summary(mod3)
##
## Formula: y ~ SSgompertz(x, Asym, b2, b3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 64713.687529 3204.047565 20.20 <2e-16 ***
## b2 10.054874 0.250256 40.18 <2e-16 ***
## b3 0.948343 0.001535 617.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 176.2 on 41 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 0.000001595richards <- function(x, th1, th2, th3) th1*(1 - exp(-th2*x))^th3
Loss <- function(th, y, x) sum((y - richards(x, th[1], th[2], th[3]))^2)
start <- optim(par = c(coef(mod2)[1], 0.001, 1), fn = Loss,
y = data$y, x = data$x)$par
names(start) <- c("th1", "th2", "th3")
mod4 = nls(y ~ richards(x, th1, th2, th3), data = data, start = start,
# trace = TRUE, # algorithm = "port",
control = nls.control(maxiter = 1000))
summary(mod4)
##
## Formula: y ~ richards(x, th1, th2, th3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th1 94282.651221 10289.484765 9.163 0.0000000000179 ***
## th2 0.035483 0.002668 13.298 < 2e-16 ***
## th3 5.719437 0.312529 18.300 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194.4 on 41 degrees of freedom
##
## Number of iterations to convergence: 11
## Achieved convergence tolerance: 0.0000009866models = list("Exponential model" = mod1,
"Logistic model" = mod2,
"Gompertz model" = mod3,
"Richards model" = mod4)
tab = data.frame(loglik = sapply(models, logLik),
df = sapply(models, function(m) attr(logLik(m), "df")),
Rsquare = sapply(models, function(m)
cor(data$y, fitted(m))^2),
AIC = sapply(models, AIC),
AICc = sapply(models, AICc),
BIC = sapply(models, BIC))
sel <- apply(tab[,4:6], 2, which.min)
tab$"" <- sapply(tabulate(sel, nbins = length(models))+1, symnum,
cutpoints = 0:4, symbols = c("", "*", "**", "***"))
knitr::kable(tab)| loglik | df | Rsquare | AIC | AICc | BIC | ||
|---|---|---|---|---|---|---|---|
| Exponential model | -369.1897 | 3 | 0.9839216 | 744.3794 | 744.9794 | 749.7319 | |
| Logistic model | -308.3937 | 4 | 0.9988963 | 624.7875 | 625.8131 | 631.9243 | |
| Gompertz model | -288.4324 | 4 | 0.9994975 | 584.8648 | 585.8904 | 592.0015 | *** |
| Richards model | -292.7633 | 4 | 0.9994170 | 593.5266 | 594.5523 | 600.6634 |
ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(aes(y = fitted(mod1), color = "Exponential")) +
geom_line(aes(y = fitted(mod2), color = "Logistic")) +
geom_line(aes(y = fitted(mod3), color = "Gompertz")) +
geom_line(aes(y = fitted(mod4), color = "Richards")) +
labs(x = "", y = "Recovered", color = "Model") +
scale_color_manual(values = cols) +
scale_y_continuous(breaks = seq(0, coef(mod2)[1], by = 1000),
minor_breaks = seq(0, coef(mod2)[1], by = 500)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))last_plot() +
scale_y_continuous(trans = "log10", limits = c(10,NA)) +
labs(y = "Recovered (log10 scale)")df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1),
fit1 = predict(mod1, newdata = df),
fit2 = predict(mod2, newdata = df),
fit3 = predict(mod3, newdata = df),
fit4 = predict(mod4, newdata = df))
ylim = c(0, max(df[,-(1:3)]))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = df, aes(x = date, y = fit1, color = "Exponential")) +
geom_line(data = df, aes(x = date, y = fit2, color = "Logistic")) +
geom_line(data = df, aes(x = date, y = fit3, color = "Gompertz")) +
geom_line(data = df, aes(x = date, y = fit4, color = "Richards")) +
coord_cartesian(ylim = ylim) +
labs(x = "", y = "Recovered", color = "Model") +
scale_y_continuous(breaks = seq(0, max(ylim), by = 1000),
minor_breaks = seq(0, max(ylim), by = 1000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))# compute prediction using Moving Block Bootstrap (MBB) for nls
df = data.frame(x = seq(min(data$x), max(data$x)+14))
df = cbind(df, date = as.Date(df$x, origin = data$date[1]-1))
pred1 = cbind(df, "fit" = predict(mod1, newdata = df))
pred1[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod1, df[df$x > max(data$x),])[,2:3]
pred2 = cbind(df, "fit" = predict(mod2, newdata = df))
pred2[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod2, df[df$x > max(data$x),])[,2:3]
pred3 = cbind(df, "fit" = predict(mod3, newdata = df))
pred3[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod3, df[df$x > max(data$x),])[,2:3]
pred4 = cbind(df, "fit" = predict(mod4, newdata = df))
pred4[df$x > max(data$x), c("lwr", "upr")] = predictMBB.nls(mod4, df[df$x > max(data$x),])[,2:3]
# predictions for next day
pred = rbind(subset(pred1, x == max(data$x)+1, select = 2:5),
subset(pred2, x == max(data$x)+1, select = 2:5),
subset(pred3, x == max(data$x)+1, select = 2:5),
subset(pred4, x == max(data$x)+1, select = 2:5))
print(pred, digits = 3)
## date fit lwr upr
## 45 2020-04-08 29421 26360 32782
## 451 2020-04-08 24832 23930 25674
## 452 2020-04-08 25678 25223 26140
## 453 2020-04-08 25836 25370 26380
ylim = c(0, max(pred2$upr, pred3$upr, na.rm=TRUE))ggplot(data, aes(x = date, y = y)) +
geom_point() +
geom_line(data = pred1, aes(x = date, y = fit, color = "Exponential")) +
geom_line(data = pred2, aes(x = date, y = fit, color = "Logistic")) +
geom_line(data = pred3, aes(x = date, y = fit, color = "Gompertz")) +
geom_line(data = pred4, aes(x = date, y = fit, color = "Richards")) +
geom_ribbon(data = pred1, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[1], alpha=0.3) +
geom_ribbon(data = pred2, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[2], alpha=0.3) +
geom_ribbon(data = pred3, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[3], alpha=0.3) +
geom_ribbon(data = pred4, aes(x = date, ymin = lwr, ymax = upr),
inherit.aes = FALSE, fill = cols[4], alpha=0.3) +
coord_cartesian(ylim = c(0, max(ylim))) +
labs(x = "", y = "Recovered", color = "Model") +
scale_y_continuous(breaks = seq(0, max(ylim), by = 5000),
minor_breaks = seq(0, max(ylim), by = 1000)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = cols) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))df = data.frame(date = COVID19$data,
positives = c(NA, diff(COVID19$totale_casi)),
swabs = c(NA, diff(COVID19$tamponi)))
df$x = as.numeric(df$date) - min(as.numeric(df$date)) + 1
# df$y = df$positives/df$swabs
df$y = df$positives/c(NA, zoo::rollmean(df$swabs, 2))
df = subset(df, swabs > 50)
# DT::datatable(df[,-4], )ggplot(df, aes(x = date)) +
geom_point(aes(y = swabs, color = "swabs"), pch = 19) +
geom_line(aes(y = swabs, color = "swabs")) +
geom_point(aes(y = positives, color = "positives"), pch = 0) +
geom_line(aes(y = positives, color = "positives")) +
labs(x = "", y = "Number of cases", color = "") +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
scale_color_manual(values = palette()[c(2,1)]) +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))ggplot(df, aes(x = date, y = y)) +
geom_smooth(method = "loess", se = TRUE, col = "black") +
geom_point(col=palette()[4]) +
geom_line(size = 0.5, col=palette()[4]) +
labs(x = "", y = "% positives among admnistered swabs (two-day rolling mean)") +
scale_y_continuous(labels = scales::percent_format(),
breaks = seq(0, 0.5, by = 0.05)) +
coord_cartesian(ylim = c(0,max(df$y, na.rm = TRUE))) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))df = data.frame(date = COVID19$data,
hospital = c(NA, diff(COVID19$totale_ospedalizzati)),
icu = c(NA, diff(COVID19$terapia_intensiva)))
df$x = as.numeric(df$date) - min(as.numeric(df$date)) + 1ggplot(df, aes(x = date, y = hospital)) +
geom_smooth(method = "loess", se = TRUE, col = "black") +
geom_point(col = "orange") +
geom_line(size = 0.5, col = "orange") +
labs(x = "", y = "Change hospitalized patients") +
coord_cartesian(ylim = range(df$hospital, na.rm = TRUE)) +
scale_y_continuous(minor_breaks = seq(min(df$hospital, na.rm = TRUE),
max(df$hospital, na.rm = TRUE),
by = 100)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))ggplot(df, aes(x = date, y = icu)) +
geom_smooth(method = "loess", se = TRUE, col = "black") +
geom_point(col = "red2") +
geom_line(size = 0.5, col = "red2") +
labs(x = "", y = "Change ICU patients") +
coord_cartesian(ylim = range(df$icu, na.rm = TRUE)) +
scale_y_continuous(minor_breaks = seq(min(df$icu, na.rm = TRUE),
max(df$icu, na.rm = TRUE),
by = 10)) +
scale_x_date(date_breaks = "2 day", date_labels = "%b%d",
minor_breaks = "1 day") +
theme_bw() +
theme(legend.position = "top",
axis.text.x = element_text(angle=60, hjust=1))